library(dplyr)
library(ggplot2)
library(biomaRt)
library(org.Hs.eg.db)
library(clusterProfiler)
library(pheatmap)
library(DT)
library(dendextend)
library(dendsort)
library(ggplotify)
library(gridExtra)
library(reshape2)
library(Mfuzz)
library(enrichplot)
leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv <- read.table(
file='leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv',
sep='\t'
)
leafcutterAStable_human_cortex_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv <- read.table(
file='leafcutterAStable_human_cortex_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv',
sep='\t'
)
leafcutterAStable_human_striatum_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv <- read.table(
file='leafcutterAStable_human_striatum_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv',
sep='\t'
)
options(repr.plot.height=6,repr.plot.width=6)
ggvenn::ggvenn(
data=list(
'IsoHD'=leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
subset(DiffSplicing.test!='Cell_type_NPCvsESC') %>%
with(gsub('-[0-9]/[0-9]$','',EVENT) %>% na.omit %>% unique),
'Cortex HD'=leafcutterAStable_human_cortex_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
with(gsub('-[0-9]/[0-9]$','',EVENT) %>% na.omit %>% unique),
'Striatum HD'=leafcutterAStable_human_striatum_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
with(gsub('-[0-9]/[0-9]$','',EVENT) %>% na.omit %>% unique)
),
fill_color=c("blue","yellow","green", "red"),
show_percentage=FALSE,
set_name_size=10,
text_size=10
)
list(
'IsoHD'=leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
subset(DiffSplicing.test!='Cell_type_NPCvsESC') %>%
with(JunctionName %>% na.omit %>% unique %>% sort),
'Cortex HD'=leafcutterAStable_human_cortex_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
with(JunctionName %>% na.omit %>% unique %>% sort),
'Striatum HD'=leafcutterAStable_human_striatum_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
with(JunctionName %>% na.omit %>% unique %>% sort)
) %>%
lapply(length)
leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents.tsv %>%
subset(DiffSplicing.test!='Cell_type_NPCvsESC' & (EVENT %in% (Reduce(intersect,list(
'IsoHD'=leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
subset(DiffSplicing.test!='Cell_type_NPCvsESC') %>%
with(EVENT %>% na.omit %>% unique),
'Cortex HD'=leafcutterAStable_human_cortex_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
with(EVENT %>% na.omit %>% unique),
'Striatum HD'=leafcutterAStable_human_striatum_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
with(EVENT %>% na.omit %>% unique)
))))) %>%
with(genes %>% unique %>% sort)
intersect(
leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
subset(grepl('_disease_stage_',DiffSplicing.test)) %>%
with(gsub('-[0-9]/[0-9]$','',EVENT)) %>% na.omit %>% unique,
leafcutterAStable_human_striatum_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
with(gsub('-[0-9]/[0-9]$','',EVENT)) %>% na.omit %>% unique
) %>% unique %>% length
intersect(
leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
subset(grepl('_disease_stage_',DiffSplicing.test)) %>%
with(gsub('-[0-9]/[0-9]$','',EVENT)) %>% na.omit %>% unique,
leafcutterAStable_human_cortex_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
with(gsub('-[0-9]/[0-9]$','',EVENT)) %>% na.omit %>% unique
) %>% unique %>% length
intersect(
leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
subset(grepl('_disease_stage_',DiffSplicing.test)) %>%
with(gsub('clu_[0-9]*_','',JunctionName)) %>% unique,
leafcutterAStable_human_striatum_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
with(gsub('clu_[0-9]*_','',JunctionName)) %>% unique
) %>% unique %>% length
intersect(
leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
subset(grepl('_disease_stage_',DiffSplicing.test)) %>%
with(gsub('clu_[0-9]*_','',JunctionName)) %>% unique,
leafcutterAStable_human_cortex_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
with(gsub('clu_[0-9]*_','',JunctionName)) %>% unique
) %>% unique %>% length
c(
intersect(
leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
subset(grepl('_disease_stage_',DiffSplicing.test)) %>%
with(gsub('clu_[0-9]*_','',JunctionName)) %>% unique,
leafcutterAStable_human_striatum_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
with(gsub('clu_[0-9]*_','',JunctionName)) %>% unique
),
intersect(
leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
subset(grepl('_disease_stage_',DiffSplicing.test)) %>%
with(gsub('clu_[0-9]*_','',JunctionName)) %>% unique,
leafcutterAStable_human_cortex_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
with(gsub('clu_[0-9]*_','',JunctionName)) %>% unique
)
) %>% unique %>% length
isohdcortexstriatum.alljunctions.naomit <- c(
intersect(
leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
subset(grepl('_disease_stage_',DiffSplicing.test)) %>%
with(gsub('clu_[0-9]*_','',JunctionName)) %>% unique,
leafcutterAStable_human_striatum_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
with(gsub('clu_[0-9]*_','',JunctionName)) %>% unique
),
intersect(
leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
subset(grepl('_disease_stage_',DiffSplicing.test)) %>%
with(gsub('clu_[0-9]*_','',JunctionName)) %>% unique,
leafcutterAStable_human_cortex_HD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
with(gsub('clu_[0-9]*_','',JunctionName)) %>% unique
)
) %>% unique %>%
(function(juncnames) {
Reduce(f=intersect,x=list(
readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
juncnames,
coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('hESC',Celltype)) %>% row.names
],
readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
juncnames,
coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('NPC',Celltype)) %>% row.names
],
readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
juncnames,
coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('Neuron',Celltype)) %>% row.names
],
readsunique.novel.human_cortex_hd_perind_logit.constcounts.known2[juncnames,],
readsunique.novel.human_striatum_hd_perind_logit.constcounts.known2[juncnames,]
) %>%
lapply(function(x) {
x %>%
t %>% scale %>% t %>%
na.omit %>%
row.names
}))
}) %>%
unlist %>% unique
isohdcortexstriatum.alljunctions.naomit %>% length
isohdcortexstriatum.Dmin <- cbind(
readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_ESCNPCNeu_isoHD2 %>% subset(Celltype=='hESC') %>% row.names
] %>%
t %>% scale %>% t,
readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_ESCNPCNeu_isoHD2 %>% subset(Celltype=='NPC') %>% row.names
] %>%
t %>% scale %>% t,
readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_ESCNPCNeu_isoHD2 %>% subset(Celltype=='Neuron') %>% row.names
] %>%
t %>% scale %>% t
) %>%
ExpressionSet(
phenoData=coldata_ESCNPCNeu_isoHD2[
c(coldata_ESCNPCNeu_isoHD2 %>% subset(Celltype=='hESC') %>% row.names,coldata_ESCNPCNeu_isoHD2[-c(8,14,21),] %>% subset(Celltype=='NPC') %>% row.names,coldata_ESCNPCNeu_isoHD2[-c(8,14,21),] %>% subset(Celltype=='Neuron') %>% row.names),
] %>% AnnotatedDataFrame
) %>%
(function(myexprset) {
myexprset %>%
Dmin(
m=myexprset %>% mestimate,
crange=2:10,
repeats=5,
visu=FALSE
)
})
options(repr.plot.width=10,repr.plot.height=5)
ggplot(
mapping=aes(
x=2:10,
y=isohdcortexstriatum.Dmin
)
) +
geom_point() +
geom_line() +
geom_vline(xintercept=4,color='red',linetype='dashed') +
labs(x='Num of clusters',y='Dmin') +
theme_bw(base_size=16)
isohdcortexstriatum.mfuzz.list <- c(2:5) %>%
lapply(function(x) {
myexprset <- cbind(
readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_ESCNPCNeu_isoHD2 %>% subset(Celltype=='hESC') %>% row.names
] %>%
t %>% scale %>% t,
readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_ESCNPCNeu_isoHD2 %>% subset(Celltype=='NPC') %>% row.names
] %>%
t %>% scale %>% t,
readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_ESCNPCNeu_isoHD2 %>% subset(Celltype=='Neuron') %>% row.names
] %>%
t %>% scale %>% t
) %>%
ExpressionSet(
phenoData=coldata_ESCNPCNeu_isoHD2[
c(coldata_ESCNPCNeu_isoHD2 %>% subset(Celltype=='hESC') %>% row.names,coldata_ESCNPCNeu_isoHD2[-c(8,14,21),] %>% subset(Celltype=='NPC') %>% row.names,coldata_ESCNPCNeu_isoHD2[-c(8,14,21),] %>% subset(Celltype=='Neuron') %>% row.names),
] %>% AnnotatedDataFrame
)
myexprset %>%
mfuzz(centers=x,m=myexprset %>% mestimate)
}) %>% setNames(nm=c(2:5))
boxplotglist <- list()
boxplotglist[['hesc']] <- readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('ESC',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names
] %>%
t %>% scale %>% t %>%
melt %>%
(function(mydf) {
mydf %>%
data.frame(
cluster=isohdcortexstriatum.mfuzz.list[['4']]$cluster[mydf$Var1] %>% factor,
membership=isohdcortexstriatum.alljunctions.naomit.notstable.membership[mydf$Var1]
)
}) %>%
subset(membership>=.6) %>%
ggplot(
mapping=aes(
x=Var2 %>% factor(
levels=coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('ESC',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names,
labels=gsub('readsunique.novel.|.bed','',coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('ESC',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names)
),
y=value
)
) +
geom_boxplot(outlier.alpha=0) +
geom_hline(yintercept=0,linetype='dashed') +
facet_grid(
facets=cluster~'isoHD: hESC',
labeller=labeller(cluster=c('1'='Cluster 1','2'='Cluster 2','3'='Cluster 3','4'='Cluster 4','5'='Cluster 5'))
) +
theme_bw(base_size=16) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5)) +
labs(x='',y='Scaled PSI')
boxplotglist[['npc']] <- readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('NPC',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names
] %>%
t %>% scale %>% t %>%
melt %>%
(function(mydf) {
mydf %>%
data.frame(
cluster=isohdcortexstriatum.mfuzz.list[['4']]$cluster[mydf$Var1] %>% factor,
membership=isohdcortexstriatum.alljunctions.naomit.notstable.membership[mydf$Var1]
)
}) %>%
subset(membership>=.6) %>%
ggplot(
mapping=aes(
x=Var2 %>% factor(
levels=coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('NPC',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names,
labels=gsub('readsunique.novel.|.bed','',coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('NPC',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names)
),
y=value
)
) +
geom_boxplot(outlier.alpha=0) +
geom_hline(yintercept=0,linetype='dashed') +
facet_grid(
facets=cluster~'isoHD: NPC',
labeller=labeller(cluster=c('1'='Cluster 1','2'='Cluster 2','3'='Cluster 3','4'='Cluster 4','5'='Cluster 5'))
) +
theme_bw(base_size=16) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5)) +
labs(x='',y='Scaled PSI')
boxplotglist[['neuron']] <- readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('Neuron',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names
] %>%
t %>% scale %>% t %>%
melt %>%
(function(mydf) {
mydf %>%
data.frame(
cluster=isohdcortexstriatum.mfuzz.list[['4']]$cluster[mydf$Var1] %>% factor,
membership=isohdcortexstriatum.alljunctions.naomit.notstable.membership[mydf$Var1]
)
}) %>%
subset(membership>=.6) %>%
ggplot(
mapping=aes(
x=Var2 %>% factor(
levels=coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('Neuron',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names,
labels=gsub('readsunique.novel.|.bed','',coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('Neuron',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names)
),
y=value
)
) +
geom_boxplot(outlier.alpha=0) +
geom_hline(yintercept=0,linetype='dashed') +
facet_grid(
facets=cluster~'isoHD: Neuron',
labeller=labeller(cluster=c('1'='Cluster 1','2'='Cluster 2','3'='Cluster 3','4'='Cluster 4','5'='Cluster 5'))
) +
theme_bw(base_size=16) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5)) +
labs(x='',y='Scaled PSI')
boxplotglist[['cortex']] <- readsunique.novel.human_cortex_hd_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_cortex_hd %>% arrange(Diagnosis) %>% row.names
] %>%
t %>% scale %>% t %>%
melt %>%
(function(mydf) {
mydf %>%
data.frame(
cluster=isohdcortexstriatum.mfuzz.list[['4']]$cluster[mydf$Var1] %>% factor,
membership=isohdcortexstriatum.alljunctions.naomit.notstable.membership[mydf$Var1]
)
}) %>%
subset(membership>=.6) %>%
ggplot(
mapping=aes(
x=Var2 %>% factor(
levels=coldata_cortex_hd %>% arrange(Diagnosis) %>% row.names,
labels=gsub('readsunique.novel.|.bed','',coldata_cortex_hd %>% arrange(Diagnosis) %>% row.names)
),
y=value
)
) +
geom_boxplot(outlier.alpha=0) +
geom_hline(yintercept=0,linetype='dashed') +
facet_grid(
facets=cluster~'Cortex HD',
labeller=labeller(cluster=c('1'='Cluster 1','2'='Cluster 2','3'='Cluster 3','4'='Cluster 4','5'='Cluster 5'))
) +
theme_bw(base_size=16) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5)) +
labs(x='',y='Scaled PSI')
boxplotglist[['striatum']] <- readsunique.novel.human_striatum_hd_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_Striatum_HD %>% arrange(Condition) %>% row.names
] %>%
t %>% scale %>% t %>%
melt %>%
(function(mydf) {
mydf %>%
data.frame(
cluster=isohdcortexstriatum.mfuzz.list[['4']]$cluster[mydf$Var1] %>% factor,
membership=isohdcortexstriatum.alljunctions.naomit.notstable.membership[mydf$Var1]
)
}) %>%
subset(membership>=.6) %>%
ggplot(
mapping=aes(
x=Var2 %>% factor(
levels=coldata_Striatum_HD %>% arrange(Condition) %>% row.names,
labels=gsub('readsunique.novel.|.bed','',coldata_Striatum_HD %>% arrange(Condition) %>% row.names)
),
y=value
)
) +
geom_boxplot(outlier.alpha=0) +
geom_hline(yintercept=0,linetype='dashed') +
facet_grid(
facets=cluster~'Striatum HD',
labeller=labeller(cluster=c('1'='Cluster 1: 107','2'='Cluster 2: 129','3'='Cluster 3: 129','4'='Cluster 4: 146','5'='Cluster 5'))
) +
theme_bw(base_size=16) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5)) +
labs(x='',y='Scaled PSI')
options(repr.plot.height=10,repr.plot.width=16)
grid.arrange(grobs=boxplotglist[c(1,2,3,4,5)],layout_matrix=matrix(1:5,nrow=1))
fuzzyglist <- list()
fuzzyglist[['hesc']] <- readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('ESC',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names
] %>%
t %>% scale %>% t %>%
melt %>%
(function(mydf) {
mydf %>%
data.frame(
cluster=isohdcortexstriatum.mfuzz.list[['4']]$cluster[mydf$Var1] %>% factor,
membership=isohdcortexstriatum.alljunctions.naomit.notstable.membership[mydf$Var1]
)
}) %>%
arrange(membership) %>%
(function(mydf) {
mydf$Var1 <- mydf$Var1 %>% as.character %>% factor(levels=mydf$Var1 %>% as.character %>% unique)
mydf
}) %>%
subset(membership>=.6) %>%
(function(mydf) {
mydf %>% ggplot(
mapping=aes(
x=Var2 %>% factor(
levels=coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('ESC',Celltype)) %>% arrange(Qlength) %>% row.names,
labels=gsub('readsunique.novel.|.bed','',coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('ESC',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names)
),
y=value,
group=Var1,
color=membership
)
) +
geom_line(alpha=.8) +
geom_line(
data=mydf %>%
group_by(Var2,cluster) %>%
summarise(meanvalue=value %>% mean),
mapping=aes(y=meanvalue,group='mean'),
color='black',size=2
) +
geom_hline(yintercept=0,linetype='dashed',size=.5) +
scale_color_gradientn(guide=FALSE,
colours=colorRampPalette(colors=c('yellow','green','blue','purple','red'))(255),
limits=c(.6,1),
na.value=NA
) +
facet_grid(
facets=cluster~'isoHD: hESC',
labeller=labeller(cluster=c('1'='Cluster 1','2'='Cluster 2','3'='Cluster 3','4'='Cluster 4','5'='Cluster 5'))
) +
theme_bw(base_size=16) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5)) +
labs(x='',y='Scaled PSI')
})
fuzzyglist[['npc']] <- readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('NPC',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names
] %>%
t %>% scale %>% t %>%
melt %>%
(function(mydf) {
mydf %>%
data.frame(
cluster=isohdcortexstriatum.mfuzz.list[['4']]$cluster[mydf$Var1] %>% factor,
membership=isohdcortexstriatum.alljunctions.naomit.notstable.membership[mydf$Var1]
)
}) %>%
arrange(membership) %>%
(function(mydf) {
mydf$Var1 <- mydf$Var1 %>% as.character %>% factor(levels=mydf$Var1 %>% as.character %>% unique)
mydf
}) %>%
subset(membership>=.6) %>%
(function(mydf) {
mydf %>% ggplot(
mapping=aes(
x=Var2 %>% factor(
levels=coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('NPC',Celltype)) %>% arrange(Qlength) %>% row.names,
labels=gsub('readsunique.novel.|.bed','',coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('NPC',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names)
),
y=value,
group=Var1,
color=membership
)
) +
geom_line(alpha=.8) +
geom_line(
data=mydf %>%
group_by(Var2,cluster) %>%
summarise(meanvalue=value %>% mean),
mapping=aes(y=meanvalue,group='mean'),
color='black',size=2
) +
geom_hline(yintercept=0,linetype='dashed',size=.5) +
scale_color_gradientn(guide=FALSE,
colours=colorRampPalette(colors=c('yellow','green','blue','purple','red'))(255),
limits=c(.6,1),
na.value=NA
) +
facet_grid(
facets=cluster~'isoHD: NPC',
labeller=labeller(cluster=c('1'='Cluster 1','2'='Cluster 2','3'='Cluster 3','4'='Cluster 4','5'='Cluster 5'))
) +
theme_bw(base_size=16) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5)) +
labs(x='',y='Scaled PSI')
})
fuzzyglist[['neuron']] <- readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('Neuron',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names
] %>%
t %>% scale %>% t %>%
melt %>%
(function(mydf) {
mydf %>%
data.frame(
cluster=isohdcortexstriatum.mfuzz.list[['4']]$cluster[mydf$Var1] %>% factor,
membership=isohdcortexstriatum.alljunctions.naomit.notstable.membership[mydf$Var1]
)
}) %>%
arrange(membership) %>%
(function(mydf) {
mydf$Var1 <- mydf$Var1 %>% as.character %>% factor(levels=mydf$Var1 %>% as.character %>% unique)
mydf
}) %>%
subset(membership>=.6) %>%
(function(mydf) {
mydf %>%
ggplot(
mapping=aes(
x=Var2 %>% factor(
levels=coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('Neuron',Celltype)) %>% arrange(Qlength) %>% row.names,
labels=gsub('readsunique.novel.|.bed','',coldata_ESCNPCNeu_isoHD2 %>% subset(grepl('Neuron',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names)
),
y=value,
group=Var1,
color=membership
)
) +
geom_line(alpha=.8) +
geom_line(
data=mydf %>%
group_by(Var2,cluster) %>%
summarise(meanvalue=value %>% mean),
mapping=aes(y=meanvalue,group='mean'),
color='black',size=2
) +
geom_hline(yintercept=0,linetype='dashed',size=.5) +
scale_color_gradientn(guide=FALSE,
colours=colorRampPalette(colors=c('yellow','green','blue','purple','red'))(255),
limits=c(.6,1),
na.value=NA
) +
facet_grid(
facets=cluster~'isoHD: Neuron',
labeller=labeller(cluster=c('1'='Cluster 1: 107','2'='Cluster 2: 129','3'='Cluster 3: 129','4'='Cluster 4: 146','5'='Cluster 5'))
) +
theme_bw(base_size=16) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5),legend.title=element_text(size=12),legend.text=element_text(size=10)) +
labs(x='',y='Scaled PSI')
})
fuzzyglist[['cortex']] <- readsunique.novel.human_cortex_hd_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_cortex_hd %>% arrange(Diagnosis) %>% row.names
] %>%
t %>% scale %>% t %>%
melt %>%
(function(mydf) {
mydf %>%
data.frame(
cluster=isohdcortexstriatum.mfuzz.list[['4']]$cluster[mydf$Var1] %>% factor,
membership=isohdcortexstriatum.alljunctions.naomit.notstable.membership[mydf$Var1]
)
}) %>%
arrange(membership) %>%
(function(mydf) {
mydf$Var1 <- mydf$Var1 %>% as.character %>% factor(levels=mydf$Var1 %>% as.character %>% unique)
mydf
}) %>%
subset(membership>=.6) %>%
(function(mydf) {
mydf %>% ggplot(
mapping=aes(
x=Var2 %>% factor(
levels=coldata_cortex_hd %>% arrange(Diagnosis) %>% row.names,
labels=gsub('readsunique.novel.|.bed','',coldata_cortex_hd %>% arrange(Diagnosis) %>% row.names)
),
y=value,
group=Var1,
color=membership
)
) +
geom_line(color='gray20',alpha=.9) +
geom_line(
data=mydf %>%
group_by(Var2,cluster) %>%
summarise(meanvalue=value %>% mean),
mapping=aes(y=meanvalue,group='mean'),
color='black',size=2
) +
geom_hline(yintercept=0,linetype='dashed',size=.5) +
scale_color_gradientn(guide=FALSE,
colours=colorRampPalette(colors=c('yellow','green','blue','purple','red'))(255),
limits=c(.6,1),
na.value=NA
) +
facet_grid(
facets=cluster~'Cortex HD',
labeller=labeller(cluster=c('1'='Cluster 1','2'='Cluster 2','3'='Cluster 3','4'='Cluster 4','5'='Cluster 5'))
) +
theme_bw(base_size=16) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5)) +
labs(x='',y='Scaled PSI')
})
fuzzyglist[['striatum']] <- readsunique.novel.human_striatum_hd_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_Striatum_HD %>% arrange(Condition) %>% row.names
] %>%
t %>% scale %>% t %>%
melt %>%
(function(mydf) {
mydf %>%
data.frame(
cluster=isohdcortexstriatum.mfuzz.list[['4']]$cluster[mydf$Var1] %>% factor,
membership=isohdcortexstriatum.alljunctions.naomit.notstable.membership[mydf$Var1]
)
}) %>%
arrange(membership) %>%
(function(mydf) {
mydf$Var1 <- mydf$Var1 %>% as.character %>% factor(levels=mydf$Var1 %>% as.character %>% unique)
mydf
}) %>%
subset(membership>=.6) %>%
(function(mydf) {
mydf %>% ggplot(
mapping=aes(
x=Var2 %>% factor(
levels=coldata_Striatum_HD %>% arrange(Condition) %>% row.names,
labels=gsub('readsunique.novel.|.bed','',coldata_Striatum_HD %>% arrange(Condition) %>% row.names)
),
y=value,
group=Var1,
color=membership
)
) +
geom_line(color='gray20',alpha=.8) +
geom_line(
data=mydf %>%
group_by(Var2,cluster) %>%
summarise(meanvalue=value %>% mean),
mapping=aes(y=meanvalue,group='mean'),
color='black',size=2
) +
geom_hline(yintercept=0,linetype='dashed',size=.5) +
scale_color_gradientn(guide=FALSE,
colours=colorRampPalette(colors=c('yellow','green','blue','purple','red'))(255),
limits=c(.6,1),
na.value=NA
) +
facet_grid(
facets=cluster~'Striatum HD',
labeller=labeller(cluster=c('1'='Cluster 1','2'='Cluster 2','3'='Cluster 3','4'='Cluster 4','5'='Cluster 5')),
drop=FALSE
) +
theme_bw(base_size=16) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5)) +
labs(x='',y='Scaled PSI')
})
options(repr.plot.height=10,repr.plot.width=17)
grid.arrange(grobs=fuzzyglist[c(1,2,3,4,5)],layout_matrix=matrix(c(1,1,1,2,2,2,3,3,3,4,4,4,5,5,5),nrow=1))
options(repr.plot.height=10,repr.plot.width=17)
grid.arrange(grobs=append(fuzzyglist[c(1,2,3)],boxplotglist[c(4,5)]),layout_matrix=matrix(c(1,1,1,2,2,2,3,3,3,4,4,4,5,5,5),nrow=1))
options(repr.plot.height=10,repr.plot.width=5)
fuzzylegend <- (
readsunique.novel.human_ESCNPCNeu_isoHD_perind_logit.constcounts.known2[
isohdcortexstriatum.alljunctions.naomit,
coldata_ESCNPCNeu_isoHD2[-c(8,14,21),] %>% subset(grepl('Neuron',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names
] %>%
t %>% scale %>% t %>%
melt %>%
(function(mydf) {
mydf %>%
data.frame(
cluster=isohdcortexstriatum.mfuzz.list[['4']]$cluster[mydf$Var1] %>% factor,
membership=isohdcortexstriatum.alljunctions.naomit.notstable.membership[mydf$Var1]
)
}) %>%
arrange(membership) %>%
(function(mydf) {
mydf$Var1 <- mydf$Var1 %>% as.character %>% factor(levels=mydf$Var1 %>% as.character %>% unique)
mydf
}) %>%
subset(membership>=.6) %>%
(function(mydf) {
mydf %>%
ggplot(
mapping=aes(
x=Var2 %>% factor(
levels=coldata_ESCNPCNeu_isoHD2[-c(8,14,21),] %>% subset(grepl('Neuron',Celltype)) %>% arrange(Qlength) %>% row.names,
labels=gsub('readsunique.novel.|.bed','',coldata_ESCNPCNeu_isoHD2[-c(8,14,21),] %>% subset(grepl('Neuron',Celltype)) %>% arrange(Qlength,Replicate) %>% row.names)
),
y=value,
group=Var1,
color=membership
)
) +
geom_line(alpha=.8) +
geom_line(
data=mydf %>%
group_by(Var2,cluster) %>%
summarise(meanvalue=value %>% mean),
mapping=aes(y=meanvalue,group='mean'),
color='black',size=2
) +
geom_hline(yintercept=0,linetype='dashed',size=.5) +
scale_color_gradientn(
colours=colorRampPalette(colors=c('yellow','green','blue','purple','red'))(255),
limits=c(.6,1),
na.value=NA
) +
facet_grid(
facets=cluster~'isoHD: Neuron',
labeller=labeller(cluster=c('1'='Cluster 1','2'='Cluster 2','3'='Cluster 3','4'='Cluster 4','5'='Cluster 5'))
) +
theme_bw(base_size=16) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5),legend.title=element_text(size=12),legend.text=element_text(size=10)) +
labs(x='',y='Scaled PSI')
})
) %>%
ggplot_build %>%
ggplot_gtable %>%
with(grobs) %>%
(function(groblist) {
groblist[[((groblist %>% sapply(with,name))=='guide-box') %>% which]]
})
options(repr.plot.height=10,repr.plot.width=18)
grid.arrange(
fuzzyglist[[1]],fuzzyglist[[2]],fuzzyglist[[3]],
fuzzylegend,
boxplotglist[[4]],boxplotglist[[5]],
layout_matrix=matrix(c(1,1,1,2,2,2,3,3,3,4,5,5,5,6,6),nrow=1)
)
fuzzygroblist <- fuzzyglist %>% lapply(ggplotGrob)
boxplotgroblist <- boxplotglist %>% lapply(ggplotGrob)
fuzzygroblist <- fuzzygroblist %>% lapply(function(mygrob) {
mygrob$heights <- boxplotgroblist[[1]]$heights
mygrob
})
boxplotgroblist <- boxplotgroblist %>% lapply(function(mygrob) {
mygrob$heights <- boxplotgroblist[[1]]$heights
mygrob
})
#options(repr.plot.height=10,repr.plot.width=18)
grid.arrange(
fuzzygroblist[[1]],fuzzygroblist[[2]],fuzzygroblist[[3]],
fuzzylegend,
boxplotgroblist[[4]],boxplotgroblist[[5]],
layout_matrix=matrix(c(1,1,1,2,2,2,3,3,3,4,5,5,5,6,6),nrow=1)
)
geneidsuniverse <- hg38samtags %>%
subset(gene_name %in% (
filtered.human_ESCNPCNeu_isoHD.leafcutter_highlyused.novel.tsv %>% with(genes) %>% unique %>% strsplit(split=',') %>% unlist %>% unique
)) %>%
with(gene_id) %>%
unique
hs_mart <- useEnsembl(biomart='genes',dataset="hsapiens_gene_ensembl",mirror='asia')
genesuniverseBM <- getBM(
filters = "ensembl_gene_id",
attributes = c('ensembl_gene_id','entrezgene_id','hgnc_symbol','description'),
values = geneidsuniverse,
mart = hs_mart,
useCache = FALSE
)
genesuniverseBM$entrezgene_id <- genesuniverseBM$entrezgene_id %>% as.character
entrezgeneidsuniverse <- genesuniverseBM %>% subset(ensembl_gene_id %in% geneidsuniverse) %>% with(entrezgene_id %>% as.character %>% unique)
isohdcortexstriatum.mfuzz.list.enrichGOBP <- leafcutterAStable_human_ESCNPCNeu_isoHD.robustintronjunctionsintersectlistlongrenameddf.vastdbevents2.tsv %>%
subset(JunctionName %in% (
isohdcortexstriatum.alljunctions.naomit.notstable.membership %>% names %>%
subset(isohdcortexstriatum.alljunctions.naomit.notstable.membership>=.6)
)) %>%
with(ensembl_gene_id) %>% as.character %>% strsplit(split=',') %>% unlist %>% unique %>% sort %>%
enrichGO(
OrgDb=org.Hs.eg.db,
keyType='ENSEMBL',
ont='BP',
pvalueCutoff=1,
qvalueCutoff=1,
universe=genesuniverseBM$ensembl_gene_id
)
isohdcortexstriatum.mfuzz.list.enrichGOBP2 <- isohdcortexstriatum.mfuzz.list.enrichGOBP %>%
pairwise_termsim %>%
simplify(cutoff=0.6,by="p.adjust",select_fun=min)
isohdcortexstriatum.mfuzz.list.enrichGOBP2 %>%
data.frame %>%
subset(qvalue<=.1)
options(repr.plot.height=6,repr.plot.width=8)
g <- isohdcortexstriatum.mfuzz.list.enrichGOBP2 %>% data.frame %>%
mutate(
gene.ratio=((gsub('/.*','',GeneRatio) %>% as.numeric)/(gsub('.*/','',GeneRatio) %>% as.numeric)),
bg.ratio=((gsub('/.*','',BgRatio) %>% as.numeric)/(gsub('.*/','',BgRatio) %>% as.numeric))
) %>%
mutate(
enrichment.score=gene.ratio/bg.ratio
) %>%
head(n=10) %>%
ggplot(
mapping=aes(
x=gene.ratio,
y=Description %>% factor(levels=Description %>% rev),
color=p.adjust,
size=Count
)
) +
geom_point() +
scale_color_gradient(name='adj. P-val',low='red',high='blue',limits=c(0,.1),breaks=c(0,.05,.1)) +
theme_bw(base_size=14) +
guides(color=guide_colorbar(reverse=TRUE)) +
coord_cartesian(xlim=c(0,.1)) +
labs(x='Gene Ratio',y='Top 10 GO terms') +
theme(axis.text.x=element_text(angle=270,vjust=.5))
g
sessionInfo()